################################################################################
# Replication Code for:
# "Unpacking Populist Secessionism: Elite Discourse and Mass Attitudes 
#  in Republika Srpska, Bosnia and Herzegovina"
# 
# Author: Semir Dzebo
# Affiliation: Central European University
# 
# This script replicates all analyses and figures in the paper
# Last updated: Aug 2, 2025
################################################################################

# Clear workspace
rm(list = ls())

# Set working directory (modify as needed)
# setwd("path/to/replication/folder")

################################################################################
# 1. LOAD REQUIRED PACKAGES
################################################################################

# Install packages if not already installed
required_packages <- c("tidyverse", "stargazer", "gridExtra", "conflicted", "mediation")

for (pkg in required_packages) {
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg)
    library(pkg, character.only = TRUE)
  }
}

# After loading all packages, explicitly declare preferences
conflict_prefer("select", "dplyr")
conflict_prefer("filter", "dplyr")

################################################################################
# 2. DATA PREPARATION
################################################################################

# Load data
df <- read_csv("data.csv")

# 2.1 Variable Renaming: Correctly designate reverse-coded variables
df <- df %>%
  rename(
    # Identity variables
    ID2r = ID2, ID5r = ID5, ID6r = ID6,
    # Fear variables  
    F4r = F4, F5r = F5, F6r = F6,
    # Economic variables
    E2r = E2, E3r = E3, E4r = E4,
    # Intergroup relations variables
    IR1r = IR1, IR3r = IR3, IR5r = IR5,
    # Ingroup bias variables
    IB2r = IB2, IB5r = IB5, IB6r = IB6,
    # Devolution variables
    D2r = D2, D4r = D4, D6r = D6,
    # National identity subversion variables
    NIS2r = NIS2, NIS4r = NIS4, NIS6r = NIS6,
    # Entitativity variables
    EN2r = EN2, EN4r = EN4, EN5r = EN5,
    # Right to dissent variables
    RTD1r = RTD1, RTD3r = RTD3, RTD5r = RTD5
  )

# 2.2 Reverse Coding: Recode specified variables (1→5, 2→4, 3→3, 4→2, 5→1)
recode_vars <- c(
  "ID2r", "ID5r", "ID6r", "F4r", "F5r", "F6r", 
  "E2r", "E3r", "E4r", "IR1r", "IR3r", "IR5r", 
  "IB2r", "IB5r", "IB6r", "D2r", "D4r", "D6r", 
  "NIS2r", "NIS4r", "NIS6r", "EN2r", "EN4r", "EN5r", 
  "RTD1r", "RTD3r", "RTD5r", "PPL2", "ANT2", "MAN2"
)

df <- df %>%
  mutate(across(all_of(recode_vars), ~ case_when(
    . == 1 ~ 5,
    . == 2 ~ 4,
    . == 3 ~ 3,
    . == 4 ~ 2,
    . == 5 ~ 1,
    TRUE ~ .
  )))

# 2.3 Missing Data: Convert 98s, 99s and 9998s to NAs
df <- df %>%
  mutate_all(~ ifelse(. %in% c(98, 99, 9998), NA, .))

# 2.4 Scale Construction: Calculate means for each theoretical construct
df <- df %>%
  mutate(
    # Identity-based measures
    identity = rowMeans(select(., c("ID1", "ID2r", "ID3", "ID4", "ID5r", "ID6r")), na.rm = TRUE),
    entitativity = rowMeans(select(., c("EN1", "EN2r", "EN3", "EN4r", "EN5r", "EN6")), na.rm = TRUE),
    ingroup_bias = rowMeans(select(., c("IB1", "IB2r", "IB3", "IB4", "IB5r", "IB6r")), na.rm = TRUE),
    identity_subversion = rowMeans(select(., c("NIS1", "NIS2r", "NIS3", "NIS4r", "NIS5", "NIS6r")), na.rm = TRUE),
    devolution = rowMeans(select(., c("D1", "D2r", "D3", "D4r", "D5", "D6r")), na.rm = TRUE),
    dissent = rowMeans(select(., c("RTD1r", "RTD2", "RTD3r", "RTD4", "RTD5r", "RTD6")), na.rm = TRUE),
    
    # Fear-based measures
    fear = rowMeans(select(., c("F1", "F2", "F3", "F4r", "F5r", "F6r")), na.rm = TRUE),
    intergroup_relations = rowMeans(select(., c("IR1r", "IR2", "IR3r", "IR4", "IR5r", "IR6")), na.rm = TRUE),
    
    # Economic measures
    economy = rowMeans(select(., c("E1", "E2r", "E3r", "E4r", "E5", "E6")), na.rm = TRUE),
    
    # Populism components
    people_centrism = rowMeans(select(., c("PPL1", "PPL2")), na.rm = TRUE),
    anti_elitism = rowMeans(select(., c("ANT1", "ANT2")), na.rm = TRUE),
    manichaeism = rowMeans(select(., c("MAN1", "MAN2")), na.rm = TRUE)
  )

# 2.5 Control Variables
df <- df %>%
  mutate(
    female = if_else(GENDER0 == 2, 1, 0),
    Serb = if_else(BA_NATIONALITY == 2, 1, 0),
    SNSD = if_else(Q2A == 16, 1, 0),
    rural = if_else(STTYPE == 2, 1, 0)
  )

# 2.6 Data Quality: Remove straight-lining cases
survey_cols <- names(df)[7:60] 
df <- df %>%
  filter(apply(select(., all_of(survey_cols)), 1, var, na.rm = TRUE) > 0)

# 2.7 Variable Standardization: Scale all independent variables to 0-1
independent_vars <- c("identity", "fear", "economy", "intergroup_relations", "ingroup_bias",
                      "devolution", "identity_subversion", "entitativity", "dissent",
                      "people_centrism", "anti_elitism", "manichaeism", 
                      "RESP_AGE0", "EDU0", "INCOMEP", "female", "Serb", "SNSD", "rural")

df_standardized <- df %>%
  mutate(across(all_of(independent_vars), ~ (.-min(., na.rm = TRUE)) / 
                  (max(., na.rm = TRUE) - min(., na.rm = TRUE))))

# 2.8 Populism Scale: Create multiplicative measure following Silva et al. (2018)
df_standardized <- df_standardized %>%
  mutate(populism = people_centrism * anti_elitism * manichaeism)

# 2.9 Final Sample: Remove cases with missing data on analytical variables in the main model
model_vars <- c("DV", "identity", "fear", "economy", "intergroup_relations", 
                "ingroup_bias", "devolution", "identity_subversion", "entitativity", 
                "dissent", "populism", "RESP_AGE0", "EDU0", "INCOMEP", "female", "rural")

df_standardized <- df_standardized %>%
  filter(complete.cases(select(., all_of(model_vars))))

# Create square root transformed populism measure for robustness checks
df_standardized <- df_standardized %>%
  mutate(
    populism_sqrt = sqrt(populism),
    populism_sqrt_std = (populism_sqrt - min(populism_sqrt, na.rm = TRUE)) / 
      (max(populism_sqrt, na.rm = TRUE) - min(populism_sqrt, na.rm = TRUE))
  )

################################################################################
# 3. DESCRIPTIVE STATISTICS
################################################################################

# Table 1: Descriptive Statistics (corresponds to Table 1 in the paper)
vars_for_stats <- c("DV", "identity", "entitativity", "ingroup_bias", "identity_subversion", 
                    "devolution", "dissent", "fear", "intergroup_relations", "economy", 
                    "populism", "people_centrism", "anti_elitism", "manichaeism", 
                    "RESP_AGE0", "EDU0", "INCOMEP", "female", "rural", "Serb", "SNSD")

# Create descriptive statistics table
desc_stats <- df_standardized %>%
  select(all_of(vars_for_stats)) %>%
  summarise_all(list(
    Mean = ~ mean(., na.rm = TRUE),
    SD = ~ sd(., na.rm = TRUE),
    SE = ~ sd(., na.rm = TRUE) / sqrt(sum(!is.na(.)))
  )) %>%
  pivot_longer(everything(), names_to = "var_stat", values_to = "value") %>%
  separate(var_stat, into = c("Variable", "Statistic"), sep = "_(?=[^_]+$)") %>%
  pivot_wider(names_from = Statistic, values_from = value)

# Print descriptive statistics 
cat("\n=== DESCRIPTIVE STATISTICS FOR FINAL SAMPLE ===\n")
desc_stats_rounded <- desc_stats %>%
  mutate_if(is.numeric, ~ round(., 3))
print(desc_stats_rounded, n = 25)

# Final sample size
cat(paste("\nFinal analytical sample size: n =", nrow(df_standardized), "\n"))

################################################################################
# 4. MAIN ANALYSES
################################################################################

# 4.1 Research Question 1: Political Support Analysis
# Do populist attitudes and secessionist preferences predict SNSD support?

# Prepare data for logistic regression
df_analysis <- df_standardized %>%
  mutate(
    SNSD_support = if_else(Q2A == 16, 1, 0),
    populism_std = (populism - min(populism, na.rm = TRUE)) / 
      (max(populism, na.rm = TRUE) - min(populism, na.rm = TRUE)),
    DV_standardized = (DV - min(DV, na.rm = TRUE)) / 
      (max(DV, na.rm = TRUE) - min(DV, na.rm = TRUE))
  )

# Logistic regression models
model_pop_snsd <- glm(SNSD_support ~ populism_std, data = df_analysis, family = binomial)
model_sec_snsd <- glm(SNSD_support ~ DV_standardized, data = df_analysis, family = binomial)

# Calculate predicted probability changes
predict_prob_change <- function(model, var_name) {
  new_data <- data.frame(x = c(0, 1))
  names(new_data) <- var_name
  pred_prob <- predict(model, newdata = new_data, type = "response")
  return(pred_prob[2] - pred_prob[1])
}

pop_prob_change <- predict_prob_change(model_pop_snsd, "populism_std")
sec_prob_change <- predict_prob_change(model_sec_snsd, "DV_standardized")

# Calculate one standard deviation probability changes
sd_pop <- sd(df_analysis$populism_std, na.rm = TRUE)
sd_sec <- sd(df_analysis$DV_standardized, na.rm = TRUE)

new_data_pop_sd <- data.frame(populism_std = c(0.5, 0.5 + sd_pop))
new_data_sec_sd <- data.frame(DV_standardized = c(0.5, 0.5 + sd_sec))

pred_prob_pop_sd <- predict(model_pop_snsd, newdata = new_data_pop_sd, type = "response")
pred_prob_sec_sd <- predict(model_sec_snsd, newdata = new_data_sec_sd, type = "response")

# Add the p-value formatting function
format_pval <- function(p) {
  if (p < 0.001) return("< 0.001")
  else if (p < 0.01) return(paste("=", round(p, 3)))
  else if (p < 0.05) return(paste("=", round(p, 3)))
  else return(paste("=", round(p, 3)))
}

# Results
cat("\n=== RESEARCH QUESTION 1: SNSD SUPPORT ANALYSIS ===\n")
cat("POPULISM MODEL:\n")
cat(paste("  Coefficient: β =", round(coef(model_pop_snsd)[2], 3), "\n"))
cat(paste("  Standard Error:", round(summary(model_pop_snsd)$coefficients[2, 2], 3), "\n"))
cat(paste("  p-value:", format_pval(summary(model_pop_snsd)$coefficients[2, 4]), "\n"))
cat(paste("  Min to Max probability change:", round(pop_prob_change, 3), "\n"))
cat(paste("  One SD increase probability change:", round(pred_prob_pop_sd[2] - pred_prob_pop_sd[1], 3), "\n"))
cat("\n")
cat("SECESSIONISM MODEL:\n")
cat(paste("  Coefficient: β =", round(coef(model_sec_snsd)[2], 3), "\n"))
cat(paste("  Standard Error:", round(summary(model_sec_snsd)$coefficients[2, 2], 3), "\n"))
cat(paste("  p-value:", format_pval(summary(model_sec_snsd)$coefficients[2, 4]), "\n"))
cat(paste("  Min to Max probability change:", round(sec_prob_change, 3), "\n"))
cat(paste("  One SD increase probability change:", round(pred_prob_sec_sd[2] - pred_prob_sec_sd[1], 3), "\n"))

# 4.2 Research Questions 2 & 3: Main Regression Models
# Does populist secessionism reflect mass attitudes? How do predictors compare
# to traditional drivers of secessionist sentiment?

# Model 1: Baseline model with demographic controls
model1 <- lm(DV ~ identity + fear + economy + intergroup_relations + 
               ingroup_bias + devolution + identity_subversion + entitativity + 
               dissent + populism + RESP_AGE0 + EDU0 + INCOMEP + female + rural,  
             data = df_standardized)

# Model 2: Add ethnic identity control
model2 <- lm(DV ~ identity + fear + economy + intergroup_relations + 
               ingroup_bias + devolution + identity_subversion + entitativity + 
               dissent + populism + RESP_AGE0 + EDU0 + INCOMEP + female + rural + 
               Serb, data = df_standardized)

# Model 3: Add partisan control
model3 <- lm(DV ~ identity + fear + economy + intergroup_relations + 
               ingroup_bias + devolution + identity_subversion + entitativity + 
               dissent + populism + RESP_AGE0 + EDU0 + INCOMEP + female + rural + 
               Serb + SNSD, data = df_standardized)

# Model 4: Decompose populism into components
model4 <- lm(DV ~ identity + fear + economy + intergroup_relations + 
               ingroup_bias + devolution + identity_subversion + entitativity + 
               dissent + people_centrism + anti_elitism + manichaeism + RESP_AGE0 + 
               EDU0 + INCOMEP + female + rural, data = df_standardized)

# Print model summaries
cat("\n=== RESEARCH QUESTIONS 2 & 3: MAIN REGRESSION RESULTS ===\n")
cat("\nModel 1 - Baseline:\n")
print(summary(model1))
cat("\nModel 4 - Populism Components:\n")
print(summary(model4))

################################################################################
# 5. ROBUSTNESS CHECKS
################################################################################

# 5.1 Research Question 1: SNSD Support Analysis (Square Root Transformed)
# Prepare data with square root transformed populism
df_analysis_sqrt <- df_standardized %>%
  mutate(
    SNSD_support = if_else(Q2A == 16, 1, 0),
    populism_sqrt_std = (populism_sqrt_std - min(populism_sqrt_std, na.rm = TRUE)) / 
      (max(populism_sqrt_std, na.rm = TRUE) - min(populism_sqrt_std, na.rm = TRUE)),
    DV_standardized = (DV - min(DV, na.rm = TRUE)) / 
      (max(DV, na.rm = TRUE) - min(DV, na.rm = TRUE))
  )

# Logistic regression models with transformed populism measure
model_pop_snsd_sqrt <- glm(SNSD_support ~ populism_sqrt_std, data = df_analysis_sqrt, family = binomial)
model_sec_snsd_sqrt <- glm(SNSD_support ~ DV_standardized, data = df_analysis_sqrt, family = binomial)

cat("\n=== ROBUSTNESS CHECK: RESEARCH QUESTION 1 (SQUARE ROOT TRANSFORMATION) ===\n")
cat("POPULISM MODEL (Square Root Transformed):\n")
cat(paste("  Coefficient: β =", round(coef(model_pop_snsd_sqrt)[2], 3), "\n"))
cat(paste("  p-value:", format_pval(summary(model_pop_snsd_sqrt)$coefficients[2, 4]), "\n"))

cat("SECESSIONISM MODEL (unchanged):\n")
cat(paste("  Coefficient: β =", round(coef(model_sec_snsd_sqrt)[2], 3), "\n"))
cat(paste("  p-value:", format_pval(summary(model_sec_snsd_sqrt)$coefficients[2, 4]), "\n"))

# Calculate predicted probability changes for robustness
predict_prob_change_sqrt <- function(model, var_name) {
  new_data <- data.frame(x = c(0, 1))
  names(new_data) <- var_name
  pred_prob <- predict(model, newdata = new_data, type = "response")
  return(pred_prob[2] - pred_prob[1])
}

pop_prob_change_sqrt <- predict_prob_change_sqrt(model_pop_snsd_sqrt, "populism_sqrt_std")
sec_prob_change_sqrt <- predict_prob_change_sqrt(model_sec_snsd_sqrt, "DV_standardized")

cat(paste("Populism (sqrt): Min to Max probability change:", round(pop_prob_change_sqrt, 3), "\n"))
cat(paste("Secessionism: Min to Max probability change:", round(sec_prob_change_sqrt, 3), "\n"))

# Calculate one standard deviation probability changes for robustness
sd_pop_sqrt <- sd(df_analysis_sqrt$populism_sqrt_std, na.rm = TRUE)
sd_sec_sqrt <- sd(df_analysis_sqrt$DV_standardized, na.rm = TRUE)

new_data_pop_sd_sqrt <- data.frame(populism_sqrt_std = c(0.5, 0.5 + sd_pop_sqrt))
new_data_sec_sd_sqrt <- data.frame(DV_standardized = c(0.5, 0.5 + sd_sec_sqrt))

pred_prob_pop_sd_sqrt <- predict(model_pop_snsd_sqrt, newdata = new_data_pop_sd_sqrt, type = "response")
pred_prob_sec_sd_sqrt <- predict(model_sec_snsd_sqrt, newdata = new_data_sec_sd_sqrt, type = "response")

cat(paste("Populism (sqrt): One SD increase probability change:", round(pred_prob_pop_sd_sqrt[2] - pred_prob_pop_sd_sqrt[1], 3), "\n"))
cat(paste("Secessionism: One SD increase probability change:", round(pred_prob_sec_sd_sqrt[2] - pred_prob_sec_sd_sqrt[1], 3), "\n"))

# 5.2 Research Questions 2 & 3: Secessionist Attitudes (Square Root Transformed)
model1_sqrt <- lm(DV ~ identity + fear + economy + intergroup_relations + 
                    ingroup_bias + devolution + identity_subversion + entitativity + 
                    dissent + populism_sqrt_std + RESP_AGE0 + EDU0 + INCOMEP + 
                    female + rural, data = df_standardized)

model2_sqrt <- lm(DV ~ identity + fear + economy + intergroup_relations + 
                    ingroup_bias + devolution + identity_subversion + entitativity + 
                    dissent + populism_sqrt_std + Serb + RESP_AGE0 + EDU0 + INCOMEP + 
                    female + rural, data = df_standardized)

model3_sqrt <- lm(DV ~ identity + fear + economy + intergroup_relations + 
                    ingroup_bias + devolution + identity_subversion + entitativity + 
                    dissent + populism_sqrt_std + Serb + SNSD + RESP_AGE0 + EDU0 + 
                    INCOMEP + female + rural, data = df_standardized)

cat("\n=== ROBUSTNESS CHECK: RESEARCH QUESTIONS 2 & 3 (SQUARE ROOT TRANSFORMATION) ===\n")
cat("All models confirm populism coefficient remains non-significant:\n")
cat(paste("Model 1: β =", round(coef(model1_sqrt)["populism_sqrt_std"], 3), 
          ", p =", round(summary(model1_sqrt)$coefficients["populism_sqrt_std", 4], 3), "\n"))
cat(paste("Model 2: β =", round(coef(model2_sqrt)["populism_sqrt_std"], 3), 
          ", p =", round(summary(model2_sqrt)$coefficients["populism_sqrt_std", 4], 3), "\n"))
cat(paste("Model 3: β =", round(coef(model3_sqrt)["populism_sqrt_std"], 3), 
          ", p =", round(summary(model3_sqrt)$coefficients["populism_sqrt_std", 4], 3), "\n"))

################################################################################
# 6. EXPLORATORY ANALYSES
################################################################################

# 6.1 Anti-Elitism × Education Interaction
model5 <- lm(DV ~ identity + fear + economy + intergroup_relations + 
               ingroup_bias + devolution + identity_subversion + entitativity + 
               dissent + people_centrism + anti_elitism + anti_elitism:EDU0 + 
               manichaeism + RESP_AGE0 + EDU0 + INCOMEP + female + rural,  
             data = df_standardized)

cat("\n=== EXPLORATORY ANALYSIS: ANTI-ELITISM × EDUCATION ===\n")
print(summary(model5))

# 6.2 Mediation Analysis: Anti-Elitism → Ingroup Bias → Secessionism
# Mediator model
med.fit <- lm(ingroup_bias ~ anti_elitism + EDU0 + RESP_AGE0 + INCOMEP + 
                female + rural, data = df_standardized)

# Outcome model
out.fit <- lm(DV ~ identity + fear + economy + intergroup_relations + 
                ingroup_bias + devolution + identity_subversion + entitativity + 
                dissent + people_centrism + anti_elitism + manichaeism + RESP_AGE0 + 
                EDU0 + INCOMEP + female + rural, data = df_standardized)

# Mediation analysis - specify the mediation package explicitly
med.out <- mediation::mediate(med.fit, out.fit, treat = "anti_elitism", 
                              mediator = "ingroup_bias", boot = TRUE, sims = 1000)

cat("\n=== MEDIATION ANALYSIS ===\n")
print(summary(med.out))

################################################################################
# 7. FIGURE GENERATION
################################################################################

# Set consistent theme for all plots
bw_theme <- theme_minimal() +
  theme(
    plot.title = element_text(size = 10),
    panel.grid.major = element_line(color = "gray90", size = 0.1),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "black"),
    text = element_text(color = "black"),
    axis.text = element_text(color = "black"),
    strip.text = element_text(size = 10),
    plot.margin = unit(c(0.5, 0.5, 0.5, 0.5), "cm")
  )

# 7.1 Figure: Distribution of Populist Attitudes (Appendix)
p1 <- ggplot(df_standardized, aes(x = populism)) +
  geom_histogram(aes(y = ..density..), bins = 30, 
                 fill = "gray90", color = "black", alpha = 0.7) +
  geom_density(color = "black", linewidth = 1) +
  labs(x = "Populism Score", y = "Density",
       title = "A. Original Distribution") +
  bw_theme

p2 <- ggplot(df_standardized, aes(x = populism_sqrt)) +
  geom_histogram(aes(y = ..density..), bins = 30, 
                 fill = "gray90", color = "black", alpha = 0.7) +
  geom_density(color = "black", linewidth = 1) +
  labs(x = "Square Root Transformed Score", y = "Density",
       title = "B. Transformed Distribution") +
  bw_theme

combined_dist <- grid.arrange(p1, p2, ncol = 2)
ggsave("populism_distributions.pdf", combined_dist, width = 10, height = 4)

# 7.2 Figure: SNSD Support by Populism and Secessionism
plot_pop <- ggplot(df_analysis, aes(x = populism_std, y = SNSD_support)) +
  geom_jitter(height = 0.05, width = 0.02, alpha = 0.07, color = "black", size = 0.5) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), 
              se = FALSE, color = "black", linewidth = 1) +
  labs(x = "Populism Score", y = "Probability of Supporting SNSD") +
  bw_theme

plot_sec <- ggplot(df_analysis, aes(x = DV_standardized, y = SNSD_support)) +
  geom_jitter(height = 0.05, width = 0.02, alpha = 0.07, color = "black", size = 0.5) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), 
              se = FALSE, color = "black", linewidth = 1) +
  labs(x = "Secessionism Preference", y = "") +
  bw_theme

combined_snsd <- grid.arrange(plot_pop, plot_sec, ncol = 2)
ggsave("populism_secessionism_snsd_comparison.pdf", combined_snsd, width = 12, height = 6)

# 7.2b Figure: SNSD Support - Robustness Check with Square Root Transformation
plot_pop_sqrt <- ggplot(df_analysis_sqrt, aes(x = populism_sqrt_std, y = SNSD_support)) +
  geom_jitter(height = 0.05, width = 0.02, alpha = 0.07, color = "black", size = 0.5) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), 
              se = FALSE, color = "black", linewidth = 1) +
  labs(x = "Transformed Populism Score", y = "Probability of Supporting SNSD") +
  bw_theme

plot_sec_sqrt <- ggplot(df_analysis_sqrt, aes(x = DV_standardized, y = SNSD_support)) +
  geom_jitter(height = 0.05, width = 0.02, alpha = 0.07, color = "black", size = 0.5) +
  geom_smooth(method = "glm", method.args = list(family = "binomial"), 
              se = FALSE, color = "black", linewidth = 1) +
  labs(x = "Secessionism Preference", y = "") +
  bw_theme

combined_snsd_sqrt <- grid.arrange(plot_pop_sqrt, plot_sec_sqrt, ncol = 2)
ggsave("populism_secessionism_snsd_comparison_sqrt.pdf", combined_snsd_sqrt, width = 12, height = 6)

# 7.3 Figure: Forest Plot of Regression Coefficients (Main Figure)
coef_summary <- summary(model1)$coef
coef_summary <- coef_summary[!is.na(coef_summary[, "Estimate"]), ]

coef_df <- data.frame(
  Variable = row.names(coef_summary),
  Coefficient = coef_summary[, "Estimate"],
  SE = coef_summary[, "Std. Error"]
) %>%
  filter(Variable != "(Intercept)")

# Define variable labels for forest plot
variable_labels <- c(
  "populism" = "Populism", 
  "identity" = "Ethno-regional identification",
  "entitativity" = "Entitativity",
  "ingroup_bias" = "Ingroup bias",
  "identity_subversion" = "Identity subversion",
  "devolution" = "Devolution",
  "dissent" = "Right to dissent",
  "fear" = "Threat perception",
  "intergroup_relations" = "Intergroup Relations",
  "economy" = "Perceived economic grievances",
  "RESP_AGE0" = "Age",
  "EDU0" = "Education",
  "INCOMEP" = "Income",
  "female" = "Female",
  "rural" = "Rural"
)

coef_df$Variable <- variable_labels[coef_df$Variable]
coef_df$Variable <- factor(coef_df$Variable, levels = rev(variable_labels))

forest_plot <- ggplot(coef_df, aes(x = Coefficient, y = Variable)) +
  geom_vline(xintercept = 0, color = "gray50", linetype = "dashed") +
  geom_point(size = 3, color = "black") +
  geom_errorbarh(aes(xmin = Coefficient - 1.96 * SE, xmax = Coefficient + 1.96 * SE), 
                 height = 0, color = "black") +
  labs(x = "Coefficient Estimate", y = "") +
  theme_minimal() +
  theme(
    legend.position = "none",
    panel.grid.major = element_line(color = "gray90"),
    panel.grid.minor = element_blank(),
    axis.line = element_blank(),
    panel.border = element_blank()
  )

ggsave("secession_predictors_plot_model1_populism_top.pdf", 
       forest_plot, width = 12.5, height = 5.5)

# 7.4 Figure: Anti-Elitism × Education Interaction (Appendix)
if (summary(model5)$coefficients["anti_elitism:EDU0", "Pr(>|t|)"] < 0.1) {
  
  # Extract coefficients for interaction plot
  coef_anti_elitism <- coef(model5)["anti_elitism"]
  coef_interaction <- coef(model5)["anti_elitism:EDU0"]
  se_anti_elitism <- summary(model5)$coefficients["anti_elitism", "Std. Error"]
  se_interaction <- summary(model5)$coefficients["anti_elitism:EDU0", "Std. Error"]
  
  # Calculate marginal effects
  calculate_effect <- function(edu) {
    effect <- coef_anti_elitism + coef_interaction * edu
    se <- sqrt(se_anti_elitism^2 + edu^2 * se_interaction^2 + 
                 2 * edu * vcov(model5)["anti_elitism", "anti_elitism:EDU0"])
    lower <- effect - 1.96 * se
    upper <- effect + 1.96 * se
    return(c(effect = effect, lower = lower, upper = upper))
  }
  
  edu_range <- seq(0, 1, length.out = 100)
  effects <- t(sapply(edu_range, calculate_effect))
  
  interaction_data <- data.frame(
    education = edu_range,
    effect = effects[, 1],
    lower = effects[, 2],
    upper = effects[, 3]
  )
  
  interaction_plot <- ggplot(interaction_data, aes(x = education, y = effect)) +
    geom_line() +
    geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
    geom_hline(yintercept = 0, linetype = "dashed") +
    labs(x = "Education Level (scaled 0-1)",
         y = "Marginal Effect of Anti-Elitism on Secessionist Attitudes") +
    theme_minimal()
  
  ggsave("anti_elitism_education_interaction.pdf", interaction_plot, 
         width = 11, height = 6)
}

# 7.5 Figure: Mediation Analysis Diagram (Appendix)
mediation_plot <- ggplot() +
  annotate("label", x = c(1, 2, 3), y = c(1, 2, 1), 
           label = c("Anti-Elitism", "Ingroup Bias", "Secessionist\nAttitudes"),
           size = 5, label.padding = unit(0.5, "lines"), label.size = 0.5) +
  geom_segment(aes(x = 1.2, xend = 1.8, y = 1.1, yend = 1.9), 
               arrow = arrow(length = unit(0.3, "cm"))) +
  geom_segment(aes(x = 2.1, xend = 2.8, y = 1.9, yend = 1.1), 
               arrow = arrow(length = unit(0.3, "cm"))) +
  geom_segment(aes(x = 1.2, xend = 2.8, y = 0.9, yend = 0.9), 
               arrow = arrow(length = unit(0.3, "cm"))) +
  annotate("text", x = 1.2, y = 1.6, label = "a = -.357***", size = 4, hjust = 0) +
  annotate("text", x = 2.5, y = 1.6, label = "b = 2.054***", size = 4, hjust = 0) +
  annotate("text", x = 2, y = 0.8, label = "c' = -.447*", size = 4) +
  theme_void() +
  xlim(0.5, 3.5) +
  ylim(0.5, 2.5)

ggsave("mediation_plot.pdf", mediation_plot, width = 11, height = 5.5)

################################################################################
# 8. TABLE GENERATION
################################################################################

# 8.1 Main Regression Table (for paper)
stargazer(model1, model2, model3, model4,
          type = "latex",
          title = "Predictors of Secessionism in RS, OLS Regression Results",
          dep.var.labels = "Secessionism",
          column.labels = c("(1)", "(2)", "(3)", "(4)"),
          covariate.labels = c(
            "Ethno-regional identification",
            "Threat perception", 
            "Perceived economic grievances",
            "Intergroup Relations",
            "Ingroup bias",
            "Devolution",
            "Identity subversion", 
            "Entitativity",
            "Right to dissent",
            "Populism",
            "People-centrism",
            "Anti-elitism", 
            "Manichaeism",
            "Age",
            "Education",
            "Income", 
            "Female",
            "Rural",
            "Serb",
            "SNSD"
          ),
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          notes = "Standard Errors in parentheses.",
          notes.append = FALSE,
          out = "main_regression_table.tex")

# 8.2 Robustness Check Table (for appendix)
stargazer(model1_sqrt, model2_sqrt, model3_sqrt, model5,
          type = "latex", 
          title = "Robustness Checks: Square Root Transformation and Interactions",
          column.labels = c("Sqrt(1)", "Sqrt(2)", "Sqrt(3)", "Interaction"),
          star.cutoffs = c(0.05, 0.01, 0.001),
          star.char = c("*", "**", "***"),
          out = "robustness_table.tex")

# 8.3 Descriptive Statistics Table
stargazer(as.data.frame(df_standardized[vars_for_stats]), 
          type = "latex",
          title = "Descriptive Statistics of Regression Variables",
          summary.stat = c("mean", "sd"),
          digits = 2,
          out = "descriptive_stats.tex")

################################################################################
# 9. SESSION INFO AND WARNINGS
################################################################################

# Print session information for reproducibility
cat("\nSESSION INFO:\n")
print(sessionInfo())

# Check for any warnings during analysis
if (length(warnings()) > 0) {
  cat("\nWARNINGS ENCOUNTERED:\n")
  print(warnings())
}

################################################################################
# END OF REPLICATION SCRIPT
################################################################################